Project

Mapping of Ki67 interactions with the genome and comparison with lamina interactions.

Introduction

Various analyses of HCT116 cells before and after Ki67 loss using a Ki67-AID degron system.

In this document, specifically replication timing.

Method

NA

Set-up

Set the parameters and list the data.

# Load dependencies - without warnings / messages
library(tidyverse)
library(GenomicRanges)
library(rtracklayer)
library(ggplot2)
library(RColorBrewer)
library(GGally)
library(corrr)
library(caTools)
library(ggrastr)
library(colorspace)

# Prepare output 
output_dir <- "ts220503_8_hct116_ki67_aid_replication_timing"
dir.create(output_dir, showWarnings = FALSE)


# Load input
chromosomes <- c(paste0("chr", 1:22), "chrX")


input_dir <- "ts220503_1_data_gathering"

bin_size <- readRDS(file.path(input_dir, "bin_size.rds"))
centromeres <- readRDS(file.path(input_dir, "centromeres.rds"))

colors_set1 <- readRDS(file.path(input_dir, "colors_set1.rds"))
colors_set2 <- readRDS(file.path(input_dir, "colors_set2.rds"))
colors_set3 <- readRDS(file.path(input_dir, "colors_set3.rds"))

tib_padamid_replicates <- readRDS(file.path(input_dir, 
                                            "tib_padamid_replicates.rds"))
tib_padamid_combined <- readRDS(file.path(input_dir, 
                                          "tib_padamid_combined.rds"))

gr_padamid_replicates <- readRDS(file.path(input_dir, 
                                           "gr_padamid_replicates.rds"))
gr_padamid_combined <- readRDS(file.path(input_dir, 
                                         "gr_padamid_combined.rds"))

tib_hmm_replicates <- readRDS(file.path(input_dir, "tib_hmm_replicates.rds"))
tib_hmm_combined <- readRDS(file.path(input_dir, "tib_hmm_combined.rds"))

padamid_metadata_replicates <- readRDS(file.path(input_dir, 
                                                 "padamid_metadata_replicates.rds"))
padamid_metadata_combined <- readRDS(file.path(input_dir, 
                                               "padamid_metadata_combined.rds"))



# Prepare seqnames
chrom_sizes <- tibble(seqnames = seqlevels(gr_padamid_combined),
                      length = seqlengths(gr_padamid_combined)) %>%
  arrange(-length)


# Scale pA-DamID scores?
tib_padamid_combined_scaled <- tib_padamid_combined %>%
  mutate_at(4:ncol(.), function(x) scale(x)[, 1]) %>%
  filter(seqnames != "chrY")
library(knitr)
opts_chunk$set(fig.width = 10, fig.height = 4, cache = T,
               dev=c('png', 'pdf'), fig.path = file.path(output_dir, "figures/")) 
pdf.options(useDingbats = FALSE)
# From Fede:
# ggpairs custom functions
corColor <- function(data, mapping, color = I("black"), sizeRange = c(1, 3), ...) {

  x   <- eval_data_col(data, mapping$x)
  y   <- eval_data_col(data, mapping$y)
  r   <- cor(x, y)
  rt  <- format(r, digits = 3)
  tt  <- as.character(rt)
  cex <- max(sizeRange)

  # helper function to calculate a useable size
  percent_of_range <- function(percent, range) {
    percent * diff(range) + min(range, na.rm = TRUE)
  }

  # plot correlation coefficient
  p <- ggally_text(label = tt, mapping = aes(), xP = 0.5, yP = 0.5,
                   # size = I(percent_of_range(cex * abs(r), sizeRange)), 
                   size = 6, 
                   color = color, ...) +
    theme(panel.grid.minor=element_blank(),
          panel.grid.major=element_blank())

  corColors <- RColorBrewer::brewer.pal(n = 7, name = "RdYlBu")[2:6]

  if (r <= boundaries[1]) {
    corCol <- corColors[1]
  } else if (r <= boundaries[2]) {
    corCol <- corColors[2]
  } else if (r < boundaries[3]) {
    corCol <- corColors[3]
  } else if (r < boundaries[4]) {
    corCol <- corColors[4]
  } else {
    corCol <- corColors[5]
  }

  p <- p +
    theme(panel.background = element_rect(fill = corCol))

  return(p)
}

customScatter <- function (data, mapping) 
{
    p <- ggplot(data = data, mapping) + 
      geom_bin2d(bins = 100) +
      geom_smooth(method = "lm", se = T, col = "red") +
      scale_fill_gradient(low = "lightgrey", high = "black", name = "Count") +
      theme_bw()
    
    p 
}

PlotScatter <- function(tib, n1, n2, color_by = NULL, identity = F, smooth = 1) {
  # Get tibble
  tib <- tib %>%
    dplyr::select(seqnames, matches(n1), matches(n2)) %>%
    rename_all(~ c("seqnames", "n1", "n2"))
  
  if (smooth > 1) {
    tib <- tib %>%
      group_by(seqnames) %>%
      mutate(n1 = runmean(n1, k = smooth),
             n2 = runmean(n2, k = smooth))
  }
  
  # Prepare color
  if (! is.null(color_by)) {
    tib <- tib %>%
      add_column(color = color_by) %>%
      drop_na()
    alpha = 1
    limits_color <- quantile(tib$color, c(0.001, 0.999), na.rm = T)
    tib$color[tib$color < limits_color[1]] <- limits_color[1]
    tib$color[tib$color > limits_color[2]] <- limits_color[2]
  } else {
    tib <- tib %>% drop_na()
    tib$color = "1"
    alpha = 0.02
  }
  
  # Plot
  xlimits <- quantile(tib$n1, c(0.001, 0.999), na.rm = T) * 1.4
  ylimits <- quantile(tib$n2, c(0.001, 0.999), na.rm = T) * 1.4
  
  plt <- tib %>%
    arrange(sample(1:nrow(.), size = nrow(.), replace = F)) %>%
    ggplot(aes(x = n1, y = n2, color = color)) +
    geom_point(size = 0.5, alpha = alpha) +
    geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
    geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
    xlab(n1) +
    ylab(n2) +
    ggtitle(paste0("Spearman: ", 
                   round(cor(tib$n1, tib$n2, use = "complete.obs",
                             method = "spearman"), 2))) +
    coord_cartesian(xlim = xlimits, ylim = ylimits) +
    theme_bw() +
    theme(aspect.ratio = 1)
  
  # Prepare color
  if (! is.null(color_by)) {
    plt <- plt +
      scale_color_gradient2(low = "blue", mid = "grey", high = "red",
                            midpoint = 0)
  } else {
    plt <- plt + 
      scale_color_manual(values = "black", guide = "none")
  }
  if (identity) plt <- plt + geom_abline(slope = 1, intercept = 0, col = "red", linetype = "dashed")
  
  plot(plt)
  
}

PlotDataTracksLight <- function(input_tib, exp_names, centromeres, 
                                color_groups, plot_chr = "chr1", 
                                plot_start = 1, plot_end = 500e6,
                                smooth = 1, color_list = NULL,
                                fix_yaxis = F, aspect_ratio = NULL,
                                lighten_negative = T, raster = F,
                                ylimits = NULL) {
  
  # Get the scores for the samples
  tib_plot <- input_tib %>%
    dplyr::select(seqnames, start, end, 
                  all_of(exp_names))
  
  if (smooth > 1) {
    tib_plot <- tib_plot %>%
      mutate_at(vars(contains("_")), 
                runmean, k = smooth)
  }
  
  # Filter for plotting window
  tib_plot <- tib_plot %>%
    filter(seqnames == plot_chr,
           start >= plot_start,
           end <= plot_end)
  
  # Gather
  tib_gather <- tib_plot %>%
    gather(key, value, -seqnames, -start, -end) %>%
    mutate(key = factor(key, levels = exp_names),
           fill_column = color_groups[match(key, names(color_groups))],
           fill_column = factor(fill_column, levels = unique(color_groups)))
  
  # If desired, make negative values a lighter shade to improve visualization
  if (lighten_negative) {
    tib_gather <- tib_gather %>%
      mutate(fill_column = interaction(fill_column,
                                       value < 0))
  }
  
  
  # Centromeres
  centromeres.plt <- as_tibble(centromeres) %>%
    filter(seqnames == plot_chr) %>%
    gather(key_centromeres, value, start, end)
  
  # Plot
  if (is.null(ylimits)) {
    ylimits <- quantile(tib_gather$value, c(0.001, 0.999), na.rm = T)
  }
  
  fill_column <- NULL
  
  plt <- tib_gather %>% 
    ggplot(aes(fill = fill_column))
  
  
  # Set all counts tracks to the same limits
  if (raster) {
    plt <- plt + 
      rasterize(geom_rect(aes(xmin = start / 1e6, xmax = end / 1e6, 
                              ymin = 0, ymax = value)),
                dpi = 300)
  } else {
    plt <- plt + 
      geom_rect(aes(xmin = start / 1e6, xmax = end / 1e6, 
                    ymin = 0, ymax = value))
  }
  
  plt <- plt + 
    geom_hline(yintercept = 0, size = 0.5) +
    geom_line(data = centromeres.plt, 
              aes(x = value / 1e6, y = 0),
              color = "black", size = 2) +
    facet_grid(key ~ ., scales = "free_y") +
    xlab(paste0(plot_chr, " (Mb)")) +
    ylab("Score") +
    scale_x_continuous(expand = c(0, 0)) + 
    scale_y_continuous(expand = c(0.025, 0.025)) +
    theme_classic()
  
  if (! is.null(color_list)) {
    
    if (lighten_negative) {
      color_list <- c(color_list,
                      lighten(color_list, amount = 0.5))
    }
    
    colors <- levels(tib_gather$fill_column)
    
    color_list <- color_list[1:length(colors)]
    names(color_list) <- colors
    #colors <- recode(colors, !!!color_list)
    
    plt <- plt +
      scale_fill_manual(values = color_list, guide = "none")
  } else {
    plt <- plt +
      scale_fill_brewer(palette = "Set1", guide = "none")
  }
  
  if (fix_yaxis) {
    plt <- plt + 
      coord_cartesian(xlim = c(max(c(plot_start,
                                     min(tib_gather$start))) / 1e6,
                               min(c(plot_end,
                                     max(tib_gather$end))) / 1e6),
                      ylim = ylimits)
  } else {
    plt <- plt + 
      coord_cartesian(xlim = c(max(c(plot_start,
                                     min(tib_gather$start))) / 1e6,
                               min(c(plot_end,
                                     max(tib_gather$end))) / 1e6))
  }
  
  if (! is.null(aspect_ratio)) {
    plt <- plt +
      theme(aspect.ratio = aspect_ratio)
  }
  
  plot(plt)
  
}

# Similar data tracks as the function above, but with the functionality to plot
# matching data tracks on top of each other
PlotDataTracksLightFaceted <- function(input_tib, exp_names, centromeres, 
                                       color_groups, facet_groups,
                                       plot_chr = "chr1", size = 0.75,
                                       plot_start = 1, plot_end = 500e6,
                                       smooth = 1, color_list = NULL,
                                       fix_yaxis = F, scale_tracks = F, 
                                       aspect_ratio = NULL, raster = T, 
                                       ylimits = NULL, remove_NA_bins = F) {
  
  # # Exp names is a vector of sample names
  # exp_search <- paste(exp_names, collapse = "|")
  
  # Get the scores for the samples
  tib_plot <- input_tib %>%
    dplyr::select(seqnames, start, end, 
                  all_of(exp_names))
  
  if (smooth > 1) {
    tib_plot <- tib_plot %>%
      mutate_at(vars(contains("_")), 
                runmean, k = smooth)
  }
  
  # Remove NA bins
  if (remove_NA_bins) {
    idx_na <- rowMeans(is.na(tib_plot[, 4:ncol(tib_plot)])) > 0
    tib_plot[idx_na, 4:ncol(tib_plot)] <- NA
  }
  
  if (scale_tracks) {
    tib_plot <- tib_plot %>%
      mutate_at(vars(contains("_")), 
                function(x) scale(x)[, 1])
  }
  
  # Filter for plotting window
  tib_plot <- tib_plot %>%
    filter(seqnames == plot_chr,
           start >= plot_start,
           end <= plot_end)
  
  # Gather
  tib_gather <- tib_plot %>%
    gather(key, value, -seqnames, -start, -end) %>%
    mutate(key = factor(key, levels = exp_names),
           fill_column = color_groups[match(key, names(color_groups))],
           fill_column = factor(fill_column, levels = unique(color_groups)),
           facet_column = facet_groups[match(key, names(facet_groups))],
           facet_column = factor(facet_column, levels = unique(facet_groups)))
  
  
  # Centromeres
  centromeres.plt <- as_tibble(centromeres) %>%
    filter(seqnames == plot_chr) %>%
    gather(key_centromeres, value, start, end)
  
  # Plot
  if (is.null(ylimits)) {
    ylimits <- quantile(tib_gather$value, c(0.001, 0.999), na.rm = T)
  }
  
  fill_column <- NULL
  
  plt <- tib_gather %>% 
    ggplot(aes(fill = fill_column))
  
  
  # Set all counts tracks to the same limits
  plt <- plt + 
    #geom_rect(aes(xmin = start / 1e6, xmax = end / 1e6, 
    #              ymin = 0, ymax = value)) +
    geom_line(aes(x = (start+end)/2e6, y = value,
                  col = fill_column), size = size) +
    geom_hline(yintercept = 0, size = 0.5) +
    geom_line(data = centromeres.plt, 
              aes(x = value / 1e6, y = 0),
              color = "black", size = 2) +
    facet_grid(facet_column ~ ., scales = "free_y") +
    xlab(paste0(plot_chr, " (Mb)")) +
    ylab("Score") +
    scale_x_continuous(expand = c(0, 0)) + 
    scale_y_continuous(expand = c(0.025, 0.025)) +
    theme_classic()
  
  if (! is.null(color_list)) {
    colors <- levels(tib_gather$fill_column)
    
    color_list <- color_list[1:length(colors)]
    names(color_list) <- colors
    #colors <- recode(colors, !!!color_list)
    
    plt <- plt +
      scale_fill_manual(values = color_list, guide = "none") +
      scale_color_manual(values = color_list, guide = "none")
  } else {
    plt <- plt +
      scale_fill_brewer(palette = "Set1", guide = "none") +
      scale_color_brewer(palette = "Set1", guide = "none")
  }
  
  if (fix_yaxis) {
    plt <- plt + 
      coord_cartesian(xlim = c(max(c(plot_start,
                                     min(tib_gather$start))) / 1e6,
                               min(c(plot_end,
                                     max(tib_gather$end))) / 1e6),
                      ylim = ylimits)
  } else {
    plt <- plt + 
      coord_cartesian(xlim = c(max(c(plot_start,
                                     min(tib_gather$start))) / 1e6,
                               min(c(plot_end,
                                     max(tib_gather$end))) / 1e6))
  }
  
  if (! is.null(aspect_ratio)) {
    plt <- plt +
      theme(aspect.ratio = aspect_ratio)
  }
  
  plot(plt)
  
}

PlotScatterBinned <- function(tib, n1, n2, color_by = NULL, identity = F, 
                              n_min = 10, ylimits_col = c(-2.4, 2.4),
                              count_range = c(0, 400), smooth = 1,
                              remove_outliers = F, midpoint_col = 0,
                              linear_gradient = F) {
  # Get tibble
  tib_process <- tib %>%
    dplyr::select(seqnames, all_of(n1), all_of(n2)) %>%
    rename_all(~ c("seqnames", "n1", "n2"))
  
  # Calculate pearson correlation without any smoothing
  tib_cor <- round(cor(tib_process$n1, tib_process$n2, use = "complete.obs",
                       method = "pearson"), 2)
  
  if (smooth > 1) {
    tib_process <- tib_process %>%
      group_by(seqnames) %>%
      mutate(n1 = runmean(n1, k = smooth),
             n2 = runmean(n2, k = smooth))
  }
  
  
  if (! is.null(color_by)) {
    tib_process <- tib_process %>%
      add_column(color = color_by)
  }
  
  tib_process <- tib_process %>%
    drop_na()
  
  # Change color range
  if (! is.null(color_by)) {
    limits_color <- quantile(tib_process$color, c(0.001, 0.999), na.rm = T)
    tib_process$color[tib_process$color < limits_color[1]] <- limits_color[1]
    tib_process$color[tib_process$color > limits_color[2]] <- limits_color[2]
  }
  
  # Replace outliers
  if (remove_outliers) {
    tib_process <- tib_process %>%
      mutate(n1 = case_when(n1 > quantile(n1, 0.999) ~ quantile(n1, 0.999),
                            n1 < quantile(n1, 0.001) ~ quantile(n1, 0.001),
                            T ~ n1),
             n2 = case_when(n2 > quantile(n2, 0.999) ~ quantile(n2, 0.999),
                            n2 < quantile(n2, 0.001) ~ quantile(n2, 0.001),
                            T ~ n2))
  }
  
  # Metrics
  n1_min = min(tib_process$n1) - 0.001
  n1_max = max(tib_process$n1) + 0.001
  n1_binsize <- (n1_max - n1_min) / 49
  
  n2_min = min(tib_process$n2) - 0.001
  n2_max = max(tib_process$n2) + 0.001
  n2_binsize <- (n2_max - n2_min) / 49
  
  tib_summary <- tib_process %>%
    mutate(n1_cut = cut(n1, 
                        seq(n1_min, n1_max, length.out = 50)),
           n2_cut = cut(n2, 
                        seq(n2_min, n2_max, length.out = 50))) %>%
    mutate(n1_bin = as.numeric(as.factor(n1_cut)),
           n2_bin = as.numeric(as.factor(n2_cut))) %>%
    mutate(n1_bin = n1_min - n1_binsize/2 + n1_bin * n1_binsize,
           n2_bin = n2_min - n2_binsize/2 + n2_bin * n2_binsize) %>%
    group_by(n1_bin, n2_bin)
  
  # Plot
  if (! is.null(color_by)) {
    tib_summary <- tib_summary %>%
    dplyr::summarise(n = n(),
                     mark = mean(color)) %>%
    ungroup() %>%
    filter(n >= n_min)
    
    plt <- tib_summary %>%
      ggplot(aes(x = n1_bin, y = n2_bin)) +
      geom_tile(aes(fill = mark))
    
    if (linear_gradient) {
      plt <- plt +
        scale_fill_gradient(low = "darkred", high = "grey80",
                            limits = ylimits_col, 
                            na.value = "green")
    } else {
      plt <- plt +
        scale_fill_gradient2(low = "blue", mid = "grey", high = "red",
                             midpoint = midpoint_col, limits = ylimits_col, 
                             na.value = "green")
    }
      
  } else {
    tib_summary <- tib_summary %>%
    dplyr::summarise(n = n()) %>%
    ungroup() %>%
    filter(n >= n_min)
    
    plt <- tib_summary %>%
      ggplot(aes(x = n1_bin, y = n2_bin)) +
      geom_tile(aes(fill = n)) +
      scale_fill_gradient(low = "lightgrey", high = "black", name = "Count",
                          limits = count_range, na.value = "green")
  }
  
  plt <- plt + 
    geom_hline(yintercept = 0, linetype = "dashed", col = "black") +
    geom_vline(xintercept = 0, linetype = "dashed", col = "black") +
    xlab(n1) +
    ylab(n2) +
    ggtitle(paste0("Pearson: ", tib_cor)) +
    theme_bw() +
    theme(aspect.ratio = 1)
  
  if (identity) plt <- plt + geom_abline(slope = 1, intercept = 0, 
                                         col = "black", linetype = "dashed")
  
  plot(plt)
  
}

# PlotDataTracksLightFaceted <- function(input_tib, exp_names, centromeres, 
#                                        color_groups, facet_groups,
#                                        plot_chr = "chr1", 
#                                        plot_start = 1, plot_end = 500e6,
#                                        smooth = 1, color_list = NULL,
#                                        fix_yaxis = F, remove_NA_bins = F) {
#   
#   # Exp names is a vector of sample names
#   exp_search <- paste(exp_names, collapse = "|")
#   
#   # Get the scores for the samples
#   tib_plot <- input_tib %>%
#     dplyr::select(seqnames, start, end, 
#                   matches(exp_search))
#   
#   if (smooth > 1) {
#     tib_plot <- tib_plot %>%
#       mutate_at(vars(contains("_")), 
#                 runmean, k = smooth)
#   }
#   
#   # Remove NA bins
#   if (remove_NA_bins) {
#     idx_na <- rowMeans(is.na(tib_plot[, 4:ncol(tib_plot)])) > 0
#     tib_plot[idx_na, 4:ncol(tib_plot)] <- NA
#   }
#   
#   # Filter for plotting window
#   tib_plot <- tib_plot %>%
#     filter(seqnames == plot_chr,
#            start >= plot_start,
#            end <= plot_end)
#   
#   # Gather
#   tib_gather <- tib_plot %>%
#     gather(key, value, -seqnames, -start, -end) %>%
#     mutate(key = factor(key, levels = exp_names),
#            fill_column = color_groups[match(key, names(color_groups))],
#            fill_column = factor(fill_column, levels = unique(color_groups)),
#            facet_column = facet_groups[match(key, names(facet_groups))],
#            facet_column = factor(facet_column, levels = unique(facet_groups)))
#   
#   
#   # Centromeres
#   centromeres.plt <- as_tibble(centromeres) %>%
#     filter(seqnames == plot_chr) %>%
#     gather(key_centromeres, value, start, end)
#   
#   # Plot
#   ylimits <- quantile(tib_gather$value, c(0.001, 0.999), na.rm = T)
#   fill_column <- NULL
#   
#   plt <- tib_gather %>% 
#     ggplot(aes(fill = fill_column))
#   
#   
#   # Set all counts tracks to the same limits
#   plt <- plt + 
#     #geom_rect(aes(xmin = start / 1e6, xmax = end / 1e6, 
#     #              ymin = 0, ymax = value)) +
#     geom_line(aes(x = (start+end)/2e6, y = value,
#                   col = fill_column)) +
#     geom_line(data = centromeres.plt, 
#               aes(x = value / 1e6, y = 0),
#               color = "black", size = 2) +
#     geom_hline(yintercept = 0, size = 0.5) +
#     facet_grid(facet_column ~ ., scales = "free_y") +
#     xlab(paste0(plot_chr, " (Mb)")) +
#     ylab("Score") +
#     scale_x_continuous(expand = c(0, 0)) + 
#     scale_y_continuous(expand = c(0.025, 0.025)) +
#     theme_classic()
#   
#   if (! is.null(color_list)) {
#     colors <- levels(tib_gather$fill_column)
#     
#     color_list <- color_list[1:length(colors)]
#     names(color_list) <- colors
#     #colors <- recode(colors, !!!color_list)
#     
#     plt <- plt +
#       scale_fill_manual(values = color_list, guide = "none") +
#       scale_color_manual(values = color_list, guide = "none")
#   } else {
#     plt <- plt +
#       scale_fill_brewer(palette = "Set1", guide = "none") +
#       scale_color_brewer(palette = "Set1", guide = "none")
#   }
#   
#   if (fix_yaxis) {
#     plt <- plt + 
#       coord_cartesian(xlim = c(max(c(plot_start,
#                                      min(tib_gather$start))) / 1e6,
#                                min(c(plot_end,
#                                      max(tib_gather$end))) / 1e6),
#                       ylim = ylimits)
#   } else {
#     plt <- plt + 
#       coord_cartesian(xlim = c(max(c(plot_start,
#                                      min(tib_gather$start))) / 1e6,
#                                min(c(plot_end,
#                                      max(tib_gather$end))) / 1e6))
#   }
#   
#   plot(plt)
#   
# }

1. Prepare data

Determine differences, and load replication timing data.

# Get metadata
padamid_metadata_combined_aid <- padamid_metadata_combined %>%
  filter(experiment == "Ki67_aid") %>%
  filter(! grepl("control", condition),
         ! grepl("thymidine", condition))

# Get tibble
tib <- tib_padamid_combined_scaled %>%
  mutate(# RO vs double-thy
         RO_vs_doublethy_Ki67 = HCT116_AID_RO_Ki67 - HCT116_AID_doublethy_Ki67,
         RO_vs_doublethy_LMNB1 = HCT116_AID_RO_LMNB1 - HCT116_AID_doublethy_LMNB1,
         RO_vs_doublethy_H3K27me3 = HCT116_AID_RO_H3K27me3 - HCT116_AID_doublethy_H3K27me3,
         RO_vs_doublethy_H3K9me3 = HCT116_AID_RO_H3K9me3 - HCT116_AID_doublethy_H3K9me3,
         # IAA vs nonIAA, double-thy
         doublethy_LMNB1_diff = HCT116_AID_doublethy_IAA_LMNB1 - HCT116_AID_doublethy_LMNB1,
         doublethy_H3K27me3_diff = HCT116_AID_doublethy_IAA_H3K27me3 - HCT116_AID_doublethy_H3K27me3,
         doublethy_H3K9me3_diff = HCT116_AID_doublethy_IAA_H3K9me3 - HCT116_AID_doublethy_H3K9me3,
         # IAA vs nonIAA, RO
         RO_LMNB1_diff = HCT116_AID_RO_IAA_LMNB1 - HCT116_AID_RO_LMNB1,
         RO_H3K27me3_diff = HCT116_AID_RO_IAA_H3K27me3 - HCT116_AID_RO_H3K27me3,
         RO_H3K9me3_diff = HCT116_AID_RO_IAA_H3K9me3 - HCT116_AID_RO_H3K9me3)

# Smoothing of differences?
smooth_differences = F

if (smooth_differences) {
  tib <- tib %>%
    group_by(seqnames) %>%
    mutate_at(vars(contains("vs"), contains("diff")), 
              function(x) runmean(x, k = 3)) %>%
    ungroup()
}

ts210804 - using replication timing data mapped by myself, rather than the data mapped by Ethan.

# Read replication timing
input_dir <- "../ts210607_repliseq_K562_LaminKO_HCT116_Ki67AID/ts210802_further_processing_replication_timing/"

rt_metadata <- readRDS(file.path(input_dir,
                                 "tib_repliseq_metadata.rds"))
tib_rt <- readRDS(file.path(input_dir,
                                 "tib_norm_average.rds")) %>%
  rename_at(4:5, ~ c("minus_IAA", "plus_IAA")) %>%
  mutate(rt_diff = plus_IAA - minus_IAA)


# Add to tibble
tib <- left_join(tib, tib_rt)
## Joining, by = c("seqnames", "start", "end")
# Add distance to centromeres
dis <- distanceToNearest(as(tib, "GRanges"), centromeres)
tib$distance_to_centromere <- mcols(dis)$distance / 1e6
# Simple data tracks - for double-thy & RO
PlotDataTracksLight(input_tib = tib, 
                    exp_names = c("HCT116_AID_doublethy_Ki67",
                                  "HCT116_AID_doublethy_LMNB1",
                                  "HCT116_AID_doublethy_IAA_LMNB1",
                                  "minus_IAA",
                                  "plus_IAA"),
                    centromeres,
                    color_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
                                     HCT116_AID_doublethy_LMNB1 = "LMNB1",
                                     HCT116_AID_doublethy_IAA_LMNB1 = "LMNB1",
                                     minus_IAA = "rt",
                                     plus_IAA = "rt"),
                    smooth = 9, plot_chr = "chr3", fix_yaxis = T,
                    plot_start = 25e6, plot_end = 100e6,
                    color_list = colors_set1[c(1:5, 7)])
## Warning: Removed 116 rows containing missing values (geom_rect).

PlotDataTracksLight(input_tib = tib,  
                    exp_names = c("HCT116_AID_doublethy_Ki67",
                                  "HCT116_AID_doublethy_LMNB1",
                                  "HCT116_AID_doublethy_IAA_LMNB1",
                                  "minus_IAA",
                                  "plus_IAA"),
                    centromeres,
                    color_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
                                     HCT116_AID_doublethy_LMNB1 = "LMNB1",
                                     HCT116_AID_doublethy_IAA_LMNB1 = "LMNB1",
                                     minus_IAA = "rt",
                                     plus_IAA = "rt"),
                    smooth = 9, plot_chr = "chr11", fix_yaxis = T,
                    plot_start = 25e6, plot_end = 100e6,
                    color_list = colors_set1[c(1:5, 7)])
## Warning: Removed 187 rows containing missing values (geom_rect).

# Faceted plots
PlotDataTracksLightFaceted(input_tib = tib, 
                           exp_names = c("HCT116_AID_doublethy_Ki67",
                                         "HCT116_AID_doublethy_LMNB1",
                                         "HCT116_AID_doublethy_IAA_LMNB1",
                                         "minus_IAA",
                                         "plus_IAA"),
                           centromeres,
                           color_groups = c(HCT116_AID_doublethy_Ki67 = "minusIAA",
                                            HCT116_AID_doublethy_LMNB1 = "minusIAA",
                                            HCT116_AID_doublethy_IAA_LMNB1 = "plusIAA",
                                            minus_IAA = "minusIAA",
                                            plus_IAA = "plusIAA"),
                           smooth = 9, plot_chr = "chr3", fix_yaxis = F,
                           plot_start = 60e6, plot_end = 110e6,
                           color_list = c("grey20", "red"),
                           facet_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
                                            HCT116_AID_doublethy_LMNB1 = "LMNB1",
                                            HCT116_AID_doublethy_IAA_LMNB1 = "LMNB1",
                                            minus_IAA = "rt",
                                            plus_IAA = "rt"),
                           remove_NA_bins = T)

PlotDataTracksLightFaceted(input_tib = tib, 
                           exp_names = c("HCT116_AID_doublethy_Ki67",
                                         "HCT116_AID_doublethy_LMNB1",
                                         "HCT116_AID_doublethy_IAA_LMNB1",
                                         "minus_IAA",
                                         "plus_IAA"),
                           centromeres,
                           color_groups = c(HCT116_AID_doublethy_Ki67 = "minusIAA",
                                            HCT116_AID_doublethy_LMNB1 = "minusIAA",
                                            HCT116_AID_doublethy_IAA_LMNB1 = "plusIAA",
                                            minus_IAA = "minusIAA",
                                            plus_IAA = "plusIAA"),
                           smooth = 9, plot_chr = "chr15", fix_yaxis = F,
                           plot_start = 18e6, plot_end = 91e6,
                           color_list = c("grey20", "red"),
                           facet_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
                                            HCT116_AID_doublethy_LMNB1 = "LMNB1",
                                            HCT116_AID_doublethy_IAA_LMNB1 = "LMNB1",
                                            minus_IAA = "rt",
                                            plus_IAA = "rt"),
                           remove_NA_bins = T)
## Warning: Removed 64 row(s) containing missing values (geom_path).

PlotDataTracksLightFaceted(input_tib = tib, 
                           exp_names = c("HCT116_AID_doublethy_Ki67",
                                         "HCT116_AID_doublethy_LMNB1",
                                         "HCT116_AID_doublethy_IAA_LMNB1",
                                         "minus_IAA",
                                         "plus_IAA"),
                           centromeres,
                           color_groups = c(HCT116_AID_doublethy_Ki67 = "minusIAA",
                                            HCT116_AID_doublethy_LMNB1 = "minusIAA",
                                            HCT116_AID_doublethy_IAA_LMNB1 = "plusIAA",
                                            minus_IAA = "minusIAA",
                                            plus_IAA = "plusIAA"),
                           smooth = 9, plot_chr = "chr12", fix_yaxis = F,
                           plot_start = 11e6, plot_end = 48e6,
                           color_list = c("grey20", "red"),
                           facet_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
                                            HCT116_AID_doublethy_LMNB1 = "LMNB1",
                                            HCT116_AID_doublethy_IAA_LMNB1 = "LMNB1",
                                            minus_IAA = "rt",
                                            plus_IAA = "rt"),
                           remove_NA_bins = T)

PlotDataTracksLightFaceted(input_tib = tib, 
                           exp_names = c("HCT116_AID_doublethy_Ki67",
                                         "HCT116_AID_doublethy_LMNB1",
                                         "HCT116_AID_doublethy_IAA_LMNB1",
                                         "minus_IAA",
                                         "plus_IAA"),
                           centromeres,
                           color_groups = c(HCT116_AID_doublethy_Ki67 = "minusIAA",
                                            HCT116_AID_doublethy_LMNB1 = "minusIAA",
                                            HCT116_AID_doublethy_IAA_LMNB1 = "plusIAA",
                                            minus_IAA = "minusIAA",
                                            plus_IAA = "plusIAA"),
                           smooth = 9, plot_chr = "chr12", fix_yaxis = F,
                           plot_start = 21e6, plot_end = 45e6,
                           color_list = c("grey20", "red"),
                           facet_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
                                            HCT116_AID_doublethy_LMNB1 = "LMNB1",
                                            HCT116_AID_doublethy_IAA_LMNB1 = "LMNB1",
                                            minus_IAA = "rt",
                                            plus_IAA = "rt"),
                           remove_NA_bins = T)

PlotDataTracksLightFaceted(input_tib = tib %>%
                             mutate_at(c("plus_IAA", "minus_IAA"),
                                       function(x) scale(x)[, 1]) %>%
                             mutate(diff_LMNB1 = HCT116_AID_doublethy_IAA_LMNB1 -
                                      HCT116_AID_doublethy_LMNB1,
                                    diff_rt = plus_IAA -
                                      minus_IAA), 
                           exp_names = c("HCT116_AID_doublethy_Ki67",
                                         "diff_LMNB1",
                                         "diff_rt"),
                           centromeres,
                           color_groups = c(HCT116_AID_doublethy_Ki67 = "minusIAA",
                                            diff_LMNB1 = "diff_LMNB1",
                                            diff_rt = "diff_rt"),
                           smooth = 9, plot_chr = "chr12", fix_yaxis = F,
                           plot_start = 21e6, plot_end = 45e6,
                           color_list = c("grey20", "red", "purple"),
                           facet_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
                                            diff_LMNB1 = "diff",
                                            diff_rt = "diff"),
                           remove_NA_bins = T)

PlotDataTracksLightFaceted(input_tib = tib, 
                           exp_names = c("HCT116_AID_doublethy_Ki67",
                                         "HCT116_AID_doublethy_LMNB1",
                                         "HCT116_AID_doublethy_IAA_LMNB1",
                                         "minus_IAA",
                                         "plus_IAA"),
                           centromeres,
                           color_groups = c(HCT116_AID_doublethy_Ki67 = "minusIAA",
                                            HCT116_AID_doublethy_LMNB1 = "minusIAA",
                                            HCT116_AID_doublethy_IAA_LMNB1 = "plusIAA",
                                            minus_IAA = "minusIAA",
                                            plus_IAA = "plusIAA"),
                           smooth = 9, plot_chr = "chr12", fix_yaxis = F,
                           plot_start = 11e6, plot_end = 48e6,
                           color_list = c("grey20", "red"),
                           facet_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
                                            HCT116_AID_doublethy_LMNB1 = "LMNB1",
                                            HCT116_AID_doublethy_IAA_LMNB1 = "LMNB1",
                                            minus_IAA = "rt",
                                            plus_IAA = "rt"),
                           remove_NA_bins = F)

PlotDataTracksLightFaceted(input_tib = tib, 
                           exp_names = c("HCT116_AID_doublethy_Ki67",
                                         "HCT116_AID_doublethy_LMNB1",
                                         "HCT116_AID_doublethy_IAA_LMNB1",
                                         "minus_IAA",
                                         "plus_IAA"),
                           centromeres,
                           color_groups = c(HCT116_AID_doublethy_Ki67 = "minusIAA",
                                            HCT116_AID_doublethy_LMNB1 = "minusIAA",
                                            HCT116_AID_doublethy_IAA_LMNB1 = "plusIAA",
                                            minus_IAA = "minusIAA",
                                            plus_IAA = "plusIAA"),
                           smooth = 9, plot_chr = "chr22", fix_yaxis = F,
                           #plot_start = 20e6, plot_end = 91e6,
                           color_list = c("grey20", "red"),
                           facet_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
                                            HCT116_AID_doublethy_LMNB1 = "LMNB1",
                                            HCT116_AID_doublethy_IAA_LMNB1 = "LMNB1",
                                            minus_IAA = "rt",
                                            plus_IAA = "rt"),
                           remove_NA_bins = T)

for (chr in chromosomes) {
  # doublethy
  PlotDataTracksLight(input_tib = tib, 
                      exp_names = c("HCT116_AID_doublethy_Ki67",
                                    "HCT116_AID_doublethy_LMNB1",
                                    "doublethy_LMNB1_diff",
                                    "minus_IAA",
                                    "rt_diff"), 
                      centromeres,
                      color_groups = c(HCT116_AID_doublethy_Ki67 = "Ki67",
                                       HCT116_AID_doublethy_LMNB1 = "LMNB1",
                                       doublethy_LMNB1_diff = "LMNB1",
                                       minus_IAA = "rt",
                                       rt_diff = "rt"),
                      smooth = 15, plot_chr = chr, fix_yaxis = F,
                      color_list = c(colors_set1[c(1:2)], "grey20"))
}
## Warning: Removed 1734 rows containing missing values (geom_rect).

## Warning: Removed 109 rows containing missing values (geom_rect).

## Warning: Removed 87 rows containing missing values (geom_rect).

## Warning: Removed 87 rows containing missing values (geom_rect).

## Warning: Removed 178 rows containing missing values (geom_rect).

## Warning: Removed 63 rows containing missing values (geom_rect).

## Warning: Removed 123 rows containing missing values (geom_rect).

## Warning: Removed 63 rows containing missing values (geom_rect).

## Warning: Removed 1570 rows containing missing values (geom_rect).

## Warning: Removed 87 rows containing missing values (geom_rect).

## Warning: Removed 173 rows containing missing values (geom_rect).

## Warning: Removed 111 rows containing missing values (geom_rect).

## Warning: Removed 1592 rows containing missing values (geom_rect).

## Warning: Removed 1665 rows containing missing values (geom_rect).

## Warning: Removed 1765 rows containing missing values (geom_rect).

## Warning: Removed 852 rows containing missing values (geom_rect).

## Warning: Removed 192 rows containing missing values (geom_rect).

## Warning: Removed 287 rows containing missing values (geom_rect).

## Warning: Removed 178 rows containing missing values (geom_rect).

## Warning: Removed 87 rows containing missing values (geom_rect).

## Warning: Removed 596 rows containing missing values (geom_rect).

## Warning: Removed 1160 rows containing missing values (geom_rect).

## Warning: Removed 278 rows containing missing values (geom_rect).

2. Simple correlations

# Scatter plot as before
tib_dropna <- tib %>% drop_na(HCT116_AID_doublethy_Ki67)

PlotScatterBinned(tib_dropna, smooth = 3, identity = T,
                  n1 = "minus_IAA", 
                  n2 = "plus_IAA", count_range = c(0, 1500))
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_dropna, smooth = 3, identity = T,
                  n1 = "minus_IAA", 
                  n2 = "plus_IAA",
                  color_by = tib_dropna$HCT116_AID_doublethy_Ki67, ylimits_col = c(-2.8, 2.8))
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_dropna, smooth = 3, identity = T,
                  n1 = "minus_IAA", 
                  n2 = "plus_IAA",
                  color_by = tib_dropna$distance_to_centromere, ylimits_col = c(0, 80),
                  midpoint_col = 40)
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_dropna, smooth = 3, identity = T,
                  n1 = "minus_IAA", 
                  n2 = "plus_IAA",
                  color_by = log10(tib_dropna$distance_to_centromere+0.001), 
                  ylimits_col = c(-1, 2.4), midpoint_col = 0.7)
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_dropna, smooth = 3, identity = T,
                  n1 = "minus_IAA", 
                  n2 = "plus_IAA",
                  color_by = log10(tib_dropna$distance_to_centromere+1), 
                  ylimits_col = c(0, 2.4), midpoint_col = 1.2)
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_dropna, smooth = 3, 
                  n1 = "distance_to_centromere", 
                  n2 = "rt_diff")
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

# Correlation as before
tib_dropna %>%
  # Smoothing of 3 bins?
  mutate_at(vars(contains("vs"), contains("diff")), 
              function(x) runmean(x, k = 3)) %>%
  dplyr::summarise(cor_rt = cor(HCT116_AID_doublethy_Ki67, rt_diff, 
                                use = "complete.obs"))
## # A tibble: 1 × 1
##   cor_rt
##    <dbl>
## 1 0.0838
# Plot chromosomal differences
tib_dropna %>%
  group_by(seqnames) %>%
  dplyr::summarise(rt = mean(rt_diff, na.rm = T)) %>%
  ungroup() %>%
  mutate(seqnames = factor(seqnames, chrom_sizes$seqnames),
         rdna = seqnames %in% paste0("chr", c(13:15, 21:22))) %>%
  ggplot(aes(x = seqnames, y = rt, col = rdna, shape = rdna)) +
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  geom_point(size = 2) +
  ylab("Difference replication timing") +
  scale_color_manual(values = c("grey20", "red")) +
  theme_bw() +
  theme(aspect.ratio = 1,
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

3. Centromeres

# Summary per chromosome
tib_summary <- tib_dropna %>%
  filter(distance_to_centromere < 2) %>%
  gather(key, value, contains("rt_diff")) %>%
  group_by(seqnames, key) %>%
  summarise(mean = mean(value, na.rm = T)) %>%
  ungroup() %>%
  mutate(seqnames = factor(seqnames, chrom_sizes$seqnames),
         rdna = seqnames %in% paste0("chr", c(13, 14, 15, 21, 22))) %>%
  mutate(size = chrom_sizes$length[match(seqnames, 
                                         chrom_sizes$seqnames)],
         size = size / 1e6)
## `summarise()` has grouped output by 'seqnames'. You can override using the `.groups` argument.
# Plot
tib_summary %>%
  ggplot(aes(x = seqnames, y = mean, col = rdna, shape = rdna)) +
  geom_point(size = 2) +
  geom_hline(yintercept = 0, col = "black") +
  xlab("") +
  ylab("Replication timing difference near centromeres") +
  scale_color_manual(values = c("grey20", "red")) +
  theme_bw() +
  theme(aspect.ratio = 1,
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

# Make boxplots
tib_boxplots <- tib_dropna %>%
  mutate(rdna = seqnames %in% paste0("chr", c(13:15, 21:22))) %>%
  dplyr::select(seqnames, rdna, contains("distance"), rt_diff) %>%
  mutate(dis_group = as.numeric(cut(distance_to_centromere, 
                                    breaks = seq(0, max(distance_to_centromere) + 1, 
                                                 by = 0.5)))/2 - 0.5)

# Plot as boxplots
# First, calculate boxplots to have more control over ylimits
tib_summary <- tib_boxplots %>%
  #group_by(seqnames, rdna, dis_group) %>%
  group_by(dis_group) %>%
  drop_na() %>%
  summarise(ymin = quantile(rt_diff, 0.05), 
            lower = quantile(rt_diff, 0.25), 
            middle = quantile(rt_diff, 0.5), 
            upper = quantile(rt_diff, 0.75), 
            ymax = quantile(rt_diff, 0.95))

tib_summary %>%
  filter(dis_group <= 20) %>%
  ggplot(aes(x = dis_group, ymin = ymin, lower = lower, middle = middle, 
             upper = upper, ymax = ymax, group = dis_group, y = middle)) +
  geom_boxplot(stat="identity", outlier.shape = NA, fill = "lightgrey", col = "black") +
  geom_line(aes(y = middle, group = NULL), col = "red") +
  geom_hline(yintercept = 0, col = "blue", linetype = "dashed") +
  #facet_grid(. ~ seqnames, scales = "free_y") + 
  xlab("Distance to centromere (Mb)") +
  ylab("Difference in RT") +
  #coord_cartesian(xlim = c(0, 30)) +
  theme_bw() +
  theme(aspect.ratio = 1)

# Repeat, for LaminB1 interactions
tib_summary <- tib_dropna %>%
  filter(distance_to_centromere < 2) %>%
  gather(key, value, contains("doublethy_LMNB1_diff")) %>%
  group_by(seqnames, key) %>%
  summarise(mean = mean(value, na.rm = T)) %>%
  ungroup() %>%
  mutate(seqnames = factor(seqnames, chrom_sizes$seqnames),
         rdna = seqnames %in% paste0("chr", c(13, 14, 15, 21, 22))) %>%
  mutate(size = chrom_sizes$length[match(seqnames, 
                                         chrom_sizes$seqnames)],
         size = size / 1e6)
## `summarise()` has grouped output by 'seqnames'. You can override using the `.groups` argument.
# Plot
tib_summary %>%
  ggplot(aes(x = seqnames, y = mean, col = rdna, shape = rdna)) +
  geom_point(size = 2) +
  geom_hline(yintercept = 0, col = "black") +
  xlab("") +
  ylab("Lamin B1 difference near centromeres") +
  scale_color_manual(values = c("grey20", "red")) +
  theme_bw() +
  theme(aspect.ratio = 1,
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

tib_boxplots <- tib_dropna %>%
  mutate(rdna = seqnames %in% paste0("chr", c(13:15, 21:22))) %>%
  dplyr::select(seqnames, rdna, contains("distance"), doublethy_LMNB1_diff) %>%
  mutate(dis_group = as.numeric(cut(distance_to_centromere, 
                                    breaks = seq(0, max(distance_to_centromere) + 1, 
                                                 by = 0.5)))/2 - 0.5)

# Plot as boxplots
# First, calculate boxplots to have more control over ylimits
tib_summary <- tib_boxplots %>%
  #group_by(seqnames, rdna, dis_group) %>%
  group_by(dis_group) %>%
  drop_na() %>%
  summarise(ymin = quantile(doublethy_LMNB1_diff, 0.05), 
            lower = quantile(doublethy_LMNB1_diff, 0.25), 
            middle = quantile(doublethy_LMNB1_diff, 0.5), 
            upper = quantile(doublethy_LMNB1_diff, 0.75), 
            ymax = quantile(doublethy_LMNB1_diff, 0.95))

tib_summary %>%
  filter(dis_group <= 20) %>%
  ggplot(aes(x = dis_group, ymin = ymin, lower = lower, middle = middle, 
             upper = upper, ymax = ymax, group = dis_group, y = middle)) +
  geom_boxplot(stat="identity", outlier.shape = NA, fill = "lightgrey", col = "black") +
  geom_line(aes(y = middle, group = NULL), col = "red") +
  geom_hline(yintercept = 0, col = "blue", linetype = "dashed") +
  #facet_grid(. ~ seqnames, scales = "free_y") + 
  xlab("Distance to centromere (Mb)") +
  ylab("Difference in Lamin B1") +
  #coord_cartesian(xlim = c(0, 30)) +
  theme_bw() +
  theme(aspect.ratio = 1)

4. Replication timing vs Ki67 scores

I also want to include a more basic figure - how does Ki67 correlate with replication timing? And how does this compare with LaminB1?

# Make simple scatter plots first
PlotScatterBinned(tib, "HCT116_AID_doublethy_Ki67", "minus_IAA")
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib, "HCT116_AID_doublethy_LMNB1", "minus_IAA")
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib, "HCT116_AID_doublethy_Ki67", "HCT116_AID_doublethy_LMNB1", 
                  color_by = tib$minus_IAA, ylimits_col = c(-4.5, 4.5))
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

I also want to make this figure in the wildtype cells - can I do that?

# Load replication from wt cells
# Read RDS file
repliseq <- readRDS("../ts191220_laminaVsNucleolus_NewAnalyses/ts200116_ProcessRepliseq/GRanges_repliseq_mean.rds")

# # Invert to L/E?
# mcols(repliseq) <- as_tibble(mcols(repliseq)) %>% 
#   mutate_all(function(x) -x)

# Find overlaps with DamID bins
gr_repliseq <- as(tib_padamid_combined, "GRanges")
ovl <- findOverlaps(gr_repliseq, repliseq, select = "arbitrary")

mcols(gr_repliseq) <- mcols(repliseq)[ovl, ]
names(mcols(gr_repliseq)) <- paste0("repliseq_",
                                    names(mcols(gr_repliseq)))

# Back to tibble
tib_repliseq <- as_tibble(gr_repliseq)

# Join with tib 
tib <- full_join(tib_padamid_combined, tib_repliseq)
## Joining, by = c("seqnames", "start", "end")
# Plot
PlotScatterBinned(tib, "RPE_wt_Ki67", "RPE_wt_LMNB1", 
                  color_by = tib$repliseq_RPE.hTERT, ylimits_col = c(-4.5, 4.5))
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib, "HCT116_wt_Ki67", "HCT116_wt_LMNB1", 
                  color_by = tib$repliseq_HCT116, ylimits_col = c(-4.5, 4.5))
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib, "K562_wt_Ki67", "K562_wt_LMNB1", 
                  color_by = tib$repliseq_K562, ylimits_col = c(-4.5, 4.5))
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib, "K562_wt_Ki67", "K562_wt_LMNB1", 
                  color_by = tib$repliseq_K562, ylimits_col = c(-2.5, 2.5))
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

Also, create some example tracks for the manuscript of these figures.

# Plot
PlotDataTracksLight(input_tib = tib, 
                    exp_names = c("RPE_wt_Ki67",
                                  "RPE_wt_LMNB1",
                                  "repliseq_RPE.hTERT"),
                    centromeres, 
                    color_groups = c(RPE_wt_Ki67 = "Ki67",
                                     RPE_wt_LMNB1 = "LMNB1",
                                     repliseq_RPE.hTERT = "rt"),
                    smooth = 9, plot_chr = "chr11", plot_end = 65e6,
                    fix_yaxis = F,
                    color_list = c(colors_set1[c(1:2)], "grey50"))
## Warning: Removed 165 rows containing missing values (geom_rect).

PlotDataTracksLight(input_tib = tib, 
                    exp_names = c("HCT116_wt_Ki67",
                                  "HCT116_wt_LMNB1",
                                  "repliseq_HCT116"),
                    centromeres,
                    color_groups = c(HCT116_wt_Ki67 = "Ki67",
                                     HCT116_wt_LMNB1 = "LMNB1",
                                     repliseq_HCT116 = "rt"),
                    smooth = 9, plot_chr = "chr11", plot_end = 65e6,
                    fix_yaxis = F,
                    color_list = c(colors_set1[c(1:2)], "grey50"))
## Warning: Removed 142 rows containing missing values (geom_rect).

PlotDataTracksLight(input_tib = tib, 
                    exp_names = c("K562_wt_Ki67",
                                  "K562_wt_LMNB1",
                                  "repliseq_K562"),
                    centromeres,
                    color_groups = c(K562_wt_Ki67 = "Ki67",
                                     K562_wt_LMNB1 = "LMNB1",
                                     repliseq_K562 = "rt"),
                    smooth = 9, plot_chr = "chr11", plot_end = 65e6,
                    fix_yaxis = F,
                    color_list = c(colors_set1[c(1:2)], "grey50"))
## Warning: Removed 174 rows containing missing values (geom_rect).

5. Gene expression versus Ki67 scores

Repeat the exercise from above with gene expression scores.

# Load the genes + results - and combine
genes <- readRDS("../ts210514_RNAseq_Ki67_depletion/ts210514_rnaseq_processing/genes.rds")
tib_fpkm <- readRDS("../ts210514_RNAseq_Ki67_depletion/ts210514_rnaseq_processing/genes_fpkm.rds")

tib_fpkm <- full_join(tib_fpkm, 
                      as_tibble(genes)) %>%
  dplyr::select(seqnames, start, end, strand, gene_id, DMSO_expr, IAA_expr) %>%
  mutate(start = start - 5e4,
         end = end + 5e4)
## Joining, by = "gene_id"
# Calculate gene interaction scores
tib_combined <- as_tibble(findOverlaps(as(tib_fpkm, "GRanges"),
                                       as(tib, "GRanges"),
                                       ignore.strand = T)) %>%
  mutate(Ki67 = tib$HCT116_AID_doublethy_Ki67[subjectHits],
         LMNB1 = tib$HCT116_AID_doublethy_LMNB1[subjectHits]) %>%
  group_by(queryHits) %>%
  dplyr::summarise(Ki67 = mean(Ki67, na.rm = T),
                   LMNB1 = mean(LMNB1, na.rm = T)) %>%
  ungroup() %>%
  mutate(seqnames = tib_fpkm$seqnames[queryHits],
         gene = tib_fpkm$gene_id[queryHits],
         DMSO_expr = tib_fpkm$DMSO_expr[queryHits],
         DMSO_log2 = log2(DMSO_expr + 1))
  

# Make simple scatter plots first
PlotScatterBinned(tib_combined, "Ki67", "DMSO_log2")
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_combined, "LMNB1", "DMSO_log2")
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_combined, "Ki67", "LMNB1", 
                  color_by = tib_combined$DMSO_log2, ylimits_col = c(-0.5, 2.9),
                  midpoint_col = 1.2)
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

# Load the genes + results - and combine
genes <- readRDS("../ts191220_laminaVsNucleolus_NewAnalyses/ts200113_GeneExpression/genes.rds")
tib_fpkm <- readRDS("../ts191220_laminaVsNucleolus_NewAnalyses/ts200113_GeneExpression/tib_fpkm.rds")

tib_fpkm <- full_join(tib_fpkm, 
                      as_tibble(genes)) %>%
  dplyr::select(seqnames, start, end, strand, gene_id, RPE_expr, HCT116_expr, K562_expr) %>%
  mutate(start = start - 5e4,
         end = end + 5e4)
## Joining, by = "gene_id"
# Calculate gene interaction scores
tib_combined <- as_tibble(findOverlaps(as(tib_fpkm, "GRanges"),
                                       as(tib, "GRanges"),
                                       ignore.strand = T)) %>%
  mutate(RPE_Ki67 = tib$RPE_wt_Ki67[subjectHits],
         RPE_LMNB1 = tib$RPE_wt_LMNB1[subjectHits],
         HCT116_Ki67 = tib$HCT116_wt_Ki67[subjectHits],
         HCT116_LMNB1 = tib$HCT116_wt_LMNB1[subjectHits],
         K562_Ki67 = tib$K562_wt_Ki67[subjectHits],
         K562_LMNB1 = tib$K562_wt_LMNB1[subjectHits]) %>%
  group_by(queryHits) %>%
  dplyr::summarise(RPE_Ki67 = mean(RPE_Ki67, na.rm = T),
                   RPE_LMNB1 = mean(RPE_LMNB1, na.rm = T),
                   HCT116_Ki67 = mean(HCT116_Ki67, na.rm = T),
                   HCT116_LMNB1 = mean(HCT116_LMNB1, na.rm = T),
                   K562_Ki67 = mean(K562_Ki67, na.rm = T),
                   K562_LMNB1 = mean(K562_LMNB1, na.rm = T)) %>%
  ungroup() %>%
  mutate(seqnames = tib_fpkm$seqnames[queryHits],
         gene = tib_fpkm$gene_id[queryHits],
         RPE_expr = log2(tib_fpkm$RPE_expr[queryHits]+1),
         HCT116_expr = log2(tib_fpkm$HCT116_expr[queryHits]+1),
         K562_expr = log2(tib_fpkm$K562_expr[queryHits]+1))
  

# Make simple scatter plots 
PlotScatterBinned(tib_combined, "RPE_Ki67", "RPE_LMNB1", 
                  color_by = tib_combined$RPE_expr, ylimits_col = c(-0.5, 2.9),
                  midpoint_col = 1.2)
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_combined, "HCT116_Ki67", "HCT116_LMNB1", 
                  color_by = tib_combined$HCT116_expr, ylimits_col = c(-0.5, 2.9),
                  midpoint_col = 1.2)
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_combined, "K562_Ki67", "K562_LMNB1", 
                  color_by = tib_combined$K562_expr, ylimits_col = c(-0.5, 2.9),
                  midpoint_col = 1.2)
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

Conclusions

Session info

sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] knitr_1.33           caTools_1.18.2       corrr_0.4.3         
##  [4] GGally_2.1.2         RColorBrewer_1.1-2   rtracklayer_1.50.0  
##  [7] GenomicRanges_1.42.0 GenomeInfoDb_1.26.7  IRanges_2.24.1      
## [10] S4Vectors_0.28.1     BiocGenerics_0.36.1  forcats_0.5.1       
## [13] stringr_1.4.0        dplyr_1.0.7          purrr_0.3.4         
## [16] readr_2.0.0          tidyr_1.1.3          tibble_3.1.6        
## [19] ggplot2_3.3.5        tidyverse_1.3.1     
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7                matrixStats_0.60.0         
##  [3] fs_1.5.0                    lubridate_1.7.10           
##  [5] httr_1.4.2                  tools_4.0.5                
##  [7] backports_1.2.1             bslib_0.2.5.1              
##  [9] utf8_1.2.2                  R6_2.5.1                   
## [11] DBI_1.1.1                   colorspace_2.0-2           
## [13] withr_2.4.2                 tidyselect_1.1.1           
## [15] compiler_4.0.5              cli_3.1.0                  
## [17] rvest_1.0.1                 Biobase_2.50.0             
## [19] xml2_1.3.2                  DelayedArray_0.16.3        
## [21] labeling_0.4.2              sass_0.4.0                 
## [23] scales_1.1.1                digest_0.6.28              
## [25] Rsamtools_2.6.0             rmarkdown_2.11             
## [27] XVector_0.30.0              pkgconfig_2.0.3            
## [29] htmltools_0.5.1.1           MatrixGenerics_1.2.1       
## [31] highr_0.9                   dbplyr_2.1.1               
## [33] rlang_0.4.12                readxl_1.3.1               
## [35] rstudioapi_0.13             farver_2.1.0               
## [37] jquerylib_0.1.4             generics_0.1.0             
## [39] jsonlite_1.7.2              BiocParallel_1.24.1        
## [41] RCurl_1.98-1.3              magrittr_2.0.1             
## [43] GenomeInfoDbData_1.2.4      Matrix_1.3-4               
## [45] Rcpp_1.0.7                  munsell_0.5.0              
## [47] fansi_0.5.0                 lifecycle_1.0.1            
## [49] stringi_1.7.3               yaml_2.2.1                 
## [51] SummarizedExperiment_1.20.0 zlibbioc_1.36.0            
## [53] plyr_1.8.6                  grid_4.0.5                 
## [55] crayon_1.4.2                lattice_0.20-44            
## [57] Biostrings_2.58.0           haven_2.4.1                
## [59] hms_1.1.0                   pillar_1.6.4               
## [61] codetools_0.2-18            reprex_2.0.0               
## [63] XML_3.99-0.6                glue_1.5.0                 
## [65] evaluate_0.14               modelr_0.1.8               
## [67] vctrs_0.3.8                 tzdb_0.1.2                 
## [69] cellranger_1.1.0            gtable_0.3.0               
## [71] reshape_0.8.8               assertthat_0.2.1           
## [73] xfun_0.24                   broom_0.7.9                
## [75] GenomicAlignments_1.26.0    ellipsis_0.3.2